In [135]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import preprocessing

from data_load import get_clean_data,normalize_1_variables,normalize_2_variables,normalize_3_variables
In [189]:
sns.set(style="whitegrid")
sns.set(style="ticks", color_codes=True)
sns.set(font_scale=2)

%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

# another cell
import matplotlib.pyplot as plt
%matplotlib inline


for i in range(10):
    plt.plot(range(10))
    plt.show()
UsageError: Line magic function `%%javascript` not found.
In [137]:
data = get_clean_data('quiz_data.csv',True)
print(data.columns)
Index(['program', 'happy', 'problem_type', 'creative', 'industry', 'outdoors',
       'career', 'group_work', 'liked_courses', 'disliked_courses',
       'programming', 'join_clubs', 'not_clubs', 'liked_projects',
       'disliked_projects', 'tv_shows', 'alternate_degree',
       'expensive_equipment', 'drawing', 'essay'],
      dtype='object')

Summary of the data

In [106]:
print("Summary of the variable: program")
fig, axs = plt.subplots(1,2)
fig.suptitle('program')

data['program'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['program'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: program
Out[106]:
<matplotlib.axes._subplots.AxesSubplot at 0x121203c50>
In [107]:
print("Summary of the variable: happy")
fig, axs = plt.subplots(1,2)
fig.suptitle('happy')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: happy
Out[107]:
<matplotlib.axes._subplots.AxesSubplot at 0x120cc4588>
In [108]:
print("Summary of the variable: problem_type")
fig, axs = plt.subplots(1,2)
fig.suptitle('problem_type')

data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: problem_type
Out[108]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ec70518>
In [168]:
print("Summary of the variable: creative")
fig, axs = plt.subplots(1,2)
fig.suptitle('creative')

data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: creative
Out[168]:
<matplotlib.axes._subplots.AxesSubplot at 0x12b2aa2b0>
In [215]:
# print("Summary of the variable: industry")
# fig, axs = plt.subplots(1,2)
# fig.suptitle('industry')

# data['industry'].value_counts().plot(kind='bar',
#                                     figsize=(14,8),
#                                     title="Frequency",
#                                     ax=axs[0])
# (data['industry'].value_counts(normalize=True) * 100).plot(kind='bar',
#                                     figsize=(14,8),
#                                     title="Percent",
#                                     ax=axs[1])
In [111]:
print("Summary of the variable: outdoors")
fig, axs = plt.subplots(1,2)
fig.suptitle('outdoors')

data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: outdoors
Out[111]:
<matplotlib.axes._subplots.AxesSubplot at 0x121a41438>
In [112]:
print("Summary of the variable: career")
fig, axs = plt.subplots(1,2)
fig.suptitle('career')

data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: career
Out[112]:
<matplotlib.axes._subplots.AxesSubplot at 0x122a4a4e0>
In [113]:
print("Summary of the variable: group_work")
fig, axs = plt.subplots(1,2)
fig.suptitle('group_work')

data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: group_work
Out[113]:
<matplotlib.axes._subplots.AxesSubplot at 0x120fbd3c8>
In [114]:
print("Summary of the variable: liked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_courses')

data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: liked_courses
Out[114]:
<matplotlib.axes._subplots.AxesSubplot at 0x1205a6940>
In [115]:
print("Summary of the variable: disliked_courses")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_courses')

data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: disliked_courses
Out[115]:
<matplotlib.axes._subplots.AxesSubplot at 0x120508ba8>
In [116]:
print("Summary of the variable: programming")
fig, axs = plt.subplots(1,2)
fig.suptitle('programming')

data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: programming
Out[116]:
<matplotlib.axes._subplots.AxesSubplot at 0x121097d68>
In [117]:
print("Summary of the variable: join_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('join_clubs')

data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: join_clubs
Out[117]:
<matplotlib.axes._subplots.AxesSubplot at 0x1203c4d68>
In [118]:
print("Summary of the variable: not_clubs")
fig, axs = plt.subplots(1,2)
fig.suptitle('not_clubs')

data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: not_clubs
Out[118]:
<matplotlib.axes._subplots.AxesSubplot at 0x126444908>
In [119]:
print("Summary of the variable: liked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('liked_projects')

data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: liked_projects
Out[119]:
<matplotlib.axes._subplots.AxesSubplot at 0x12228f978>
In [120]:
print("Summary of the variable: disliked_projects")
fig, axs = plt.subplots(1,2)
fig.suptitle('disliked_projects')

data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: disliked_projects
Out[120]:
<matplotlib.axes._subplots.AxesSubplot at 0x121a91780>
In [121]:
print("Summary of the variable: tv_shows")
fig, axs = plt.subplots(1,2)
fig.suptitle('tv_shows')

data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: tv_shows
Out[121]:
<matplotlib.axes._subplots.AxesSubplot at 0x1262a6c50>
In [122]:
print("Summary of the variable: alternate_degree")
fig, axs = plt.subplots(1,2)
fig.suptitle('alternate_degree')

data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: alternate_degree
Out[122]:
<matplotlib.axes._subplots.AxesSubplot at 0x120236ac8>
In [123]:
print("Summary of the variable: expensive_equipment")
fig, axs = plt.subplots(1,2)
fig.suptitle('expensive_equipment')

data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: expensive_equipment
Out[123]:
<matplotlib.axes._subplots.AxesSubplot at 0x1221c8400>
In [124]:
print("Summary of the variable: drawing")
fig, axs = plt.subplots(1,2)
fig.suptitle('drawing')

data['drawing'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['drawing'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: drawing
Out[124]:
<matplotlib.axes._subplots.AxesSubplot at 0x1264602e8>
In [125]:
print("Summary of the variable: essay")
fig, axs = plt.subplots(1,2)
fig.suptitle('essay')

data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="Frequency",
                                    ax=axs[0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="Percent",
                                    ax=axs[1])
Summary of the variable: essay
Out[125]:
<matplotlib.axes._subplots.AxesSubplot at 0x120655c50>

Summary of data by program

In [194]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: mech")
data =  data[data.program=='mech']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('mech')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: mech
In [195]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: bmed")
data =  data[data.program=='bmed']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('bmed')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: bmed
In [196]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: sft")
data =  data[data.program=='sft']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('sft')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: sft
In [197]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: ce")
data =  data[data.program=='ce']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('ce')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: ce
In [198]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: tron")
data =  data[data.program=='tron']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('tron')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: tron
In [199]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: cive")
data =  data[data.program=='cive']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('cive')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: cive
In [200]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: chem")
data =  data[data.program=='chem']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('chem')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: chem
In [201]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: syde")
data =  data[data.program=='syde']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('syde')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: syde
In [202]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: msci")
data =  data[data.program=='msci']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('msci')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: msci
In [203]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: elec")
data =  data[data.program=='elec']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('elec')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: elec
In [204]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: nano")
data =  data[data.program=='nano']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('nano')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: nano
In [205]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: geo")
data =  data[data.program=='geo']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('geo')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: geo
In [206]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: env")
data =  data[data.program=='env']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('env')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: env
In [211]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: arch-e")
data =  data[data.program=='arch-e']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('arch-e')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: arch-e
In [208]:
data = get_clean_data('quiz_data.csv',True) 
print("Summary of the Program: arch")
data =  data[data.program=='arch']
fig, axes = plt.subplots(nrows=17, ncols=2)
fig.suptitle('arch')

data['happy'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyFrequency",
                                    ax=axes[0][0])
(data['happy'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="happyPercent",
                                    ax=axes[0][1])
data['problem_type'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typeFrequency",
                                    ax=axes[1][0])
(data['problem_type'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="problem_typePercent",
                                    ax=axes[1][1])
data['creative'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativeFrequency",
                                    ax=axes[2][0])
(data['creative'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="creativePercent",
                                    ax=axes[2][1])
data['outdoors'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsFrequency",
                                    ax=axes[3][0])
(data['outdoors'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="outdoorsPercent",
                                    ax=axes[3][1])
data['career'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerFrequency",
                                    ax=axes[4][0])
(data['career'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="careerPercent",
                                    ax=axes[4][1])
data['group_work'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workFrequency",
                                    ax=axes[5][0])
(data['group_work'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="group_workPercent",
                                    ax=axes[5][1])
data['liked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesFrequency",
                                    ax=axes[6][0])
(data['liked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_coursesPercent",
                                    ax=axes[6][1])
data['disliked_courses'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesFrequency",
                                    ax=axes[7][0])
(data['disliked_courses'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_coursesPercent",
                                    ax=axes[7][1])
data['programming'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingFrequency",
                                    ax=axes[8][0])
(data['programming'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="programmingPercent",
                                    ax=axes[8][1]) 
data['join_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsFrequency",
                                    ax=axes[9][0])
(data['join_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="join_clubsPercent",
                                    ax=axes[9][1])
data['not_clubs'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsFrequency",
                                    ax=axes[10][0])
(data['not_clubs'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="not_clubsPercent",
                                    ax=axes[10][1]) 
data['liked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsFrequency",
                                    ax=axes[11][0])
(data['liked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="liked_projectsPercent",
                                    ax=axes[11][1])
data['disliked_projects'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsFrequency",
                                    ax=axes[12][0])
(data['disliked_projects'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="disliked_projectsPercent",
                                    ax=axes[12][1])  
data['tv_shows'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsFrequency",
                                    ax=axes[13][0])
(data['tv_shows'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="tv_showsPercent",
                                    ax=axes[13][1])
data['alternate_degree'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreeFrequency",
                                    ax=axes[14][0])
(data['alternate_degree'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="alternate_degreePercent",
                                    ax=axes[14][1])  
data['expensive_equipment'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentFrequency",
                                    ax=axes[15][0])
(data['expensive_equipment'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="expensive_equipmentPercent",
                                    ax=axes[15][1])  
data['essay'].value_counts().plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayFrequency",
                                    ax=axes[16][0])
(data['essay'].value_counts(normalize=True) * 100).plot(kind='bar',
                                    figsize=(14,8),
                                    title="essayPercent",
                                    ax=axes[16][1])


top = 10      # the top of the subplots of the figure
hspace = 1.2  # the amount of height reserved for white space between subplots
plt.subplots_adjust(top=top,hspace=hspace)

data = get_clean_data('quiz_data.csv',True) 
Summary of the Program: arch

Bar Plots Mapping Programs Against Each Variable

In [53]:
print("program vs. happy")
plot_data = normalize_1_variables(data,"program","happy")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="happy",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. happy
In [54]:
print("program vs. problem_type")
plot_data = normalize_1_variables(data,"program","problem_type")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="problem_type",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. problem_type
In [55]:
print("program vs. creative")
plot_data = normalize_1_variables(data,"program","creative")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="creative",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. creative
In [213]:
# print("program vs. industry")
# plot_data = normalize_1_variables(data,"program","industry")
# g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="industry",
#             data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
# g.set_xlabels('')
# g.set_ylabels('percent')

# for ax in g.axes:
#     plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
# plt.subplots_adjust(hspace=0.3)
# plt.show()
In [59]:
print("program vs. outdoors")
plot_data = normalize_1_variables(data,"program","outdoors")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="outdoors",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. outdoors
In [60]:
print("program vs. career")
plot_data = normalize_1_variables(data,"program","career")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="career",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. career
In [61]:
print("program vs. group_work")
plot_data = normalize_1_variables(data,"program","group_work")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="group_work",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. group_work
In [62]:
print("program vs. liked_courses")
plot_data = normalize_1_variables(data,"program","liked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_courses",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. liked_courses
In [63]:
print("program vs. disliked_courses")
plot_data = normalize_1_variables(data,"program","disliked_courses")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_courses",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. disliked_courses
In [64]:
print("program vs. programming")
plot_data = normalize_1_variables(data,"program","programming")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="programming",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. programming
In [65]:
print("program vs. join_clubs")
plot_data = normalize_1_variables(data,"program","join_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="join_clubs",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. join_clubs
In [66]:
print("program vs. not_clubs")
plot_data = normalize_1_variables(data,"program","not_clubs")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="not_clubs",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. not_clubs
In [67]:
print("program vs. liked_projects")
plot_data = normalize_1_variables(data,"program","liked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="liked_projects",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. liked_projects
In [68]:
print("program vs. disliked_projects")
plot_data = normalize_1_variables(data,"program","disliked_projects")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="disliked_projects",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. disliked_projects
In [69]:
print("program vs. tv_shows")
plot_data = normalize_1_variables(data,"program","tv_shows")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="tv_shows",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. tv_shows
In [70]:
print("program vs. alternate_degree")
plot_data = normalize_1_variables(data,"program","alternate_degree")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="alternate_degree",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. alternate_degree
In [71]:
print("program vs. expensive_equipment")
plot_data = normalize_1_variables(data,"program","expensive_equipment")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="expensive_equipment",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. expensive_equipment
In [72]:
print("program vs. drawing")
plot_data = normalize_1_variables(data,"program","drawing")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="drawing",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. drawing
In [73]:
print("program vs. essay")
plot_data = normalize_1_variables(data,"program","essay")
g = sns.catplot(y="percent",x="program",order=data.program.unique(),col="essay",
            data=plot_data,kind='bar',height=10,aspect=2.5,col_wrap=1,margin_titles=True)
g.set_xlabels('')
g.set_ylabels('percent')

for ax in g.axes:
    plt.setp(ax.get_xticklabels(), visible=True, rotation=45,label='big')
plt.subplots_adjust(hspace=0.3)
plt.show()
program vs. essay

Delete later, below is a playground to test ideas with the golf data

In [101]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from data_load import get_encoded_data
import json
import pandas as pd
import numpy as np
from sklearn import preprocessing
import pickle

directory = 'golf_data.csv'
df = pd.read_csv(directory,dtype=str)
df = df.drop(df.columns[[0,6]], axis=1)

print(df.head())
    OUTLOOK TEMPERATURE HUMIDITY  WINDY PLAY
0     Rainy         Hot     High  FALSE   No
1     Rainy         Hot     High   TRUE   No
2  Overcast         Hot     High  FALSE  Yes
3     Sunny        Mild     High  FALSE  Yes
4     Sunny        Cool   Normal  FALSE  Yes
In [102]:
outlook = {'Rainy': 1, 'Overcast': 0, 'Sunny': 2, 'column': 'OUTLOOK'}
temperature = {'Hot': 1, 'Mild': 2, 'Cool': 0, 'column': 'TEMPERATURE'}
humidity = {'High': 0, 'Normal': 1, 'column': 'HUMIDITY'}
windy = {'FALSE': 0, 'TRUE': 1, 'column': 'WINDY'}
In [103]:
col_list = list(df.columns)
encoded_dict_list = []
for col in col_list:
    keys = df[col].unique()
    le = preprocessing.LabelEncoder()
    le.fit(list(keys))
    df[col] = le.transform(list(df[col]))
    vals = df[col].unique()
    keys = list(le.inverse_transform(vals))
    cd = dict(zip(keys,vals))
    cd['column'] = col
    encoded_dict_list.append(cd)
print(encoded_dict_list)
print(df.head())
[{'Rainy': 1, 'Overcast': 0, 'Sunny': 2, 'column': 'OUTLOOK'}, {'Hot': 1, 'Mild': 2, 'Cool': 0, 'column': 'TEMPERATURE'}, {'High': 0, 'Normal': 1, 'column': 'HUMIDITY'}, {'FALSE': 0, 'TRUE': 1, 'column': 'WINDY'}, {'No': 0, 'Yes': 1, 'column': 'PLAY'}]
   OUTLOOK  TEMPERATURE  HUMIDITY  WINDY  PLAY
0        1            1         0      0     0
1        1            1         0      1     0
2        0            1         0      0     1
3        2            2         0      0     1
4        2            0         1      0     1
In [104]:
x_df = df.drop(axis=1,columns=["PLAY"])
y_df = df["PLAY"]

X = np.array(x_df) # convert dataframe into np array
y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(x_df, y_df) # fit the model using training data

cat = df.drop('PLAY',axis=1)
index_dict = dict(zip(cat.columns,range(cat.shape[1])))
In [105]:
with open('nb_model.pkl', 'wb') as fid:
    pickle.dump(model, fid,2)

'''
We need to create our feature vector of exact same dimension as our training set. To convert our user input into dummy variables, we should save a dict of the the dummy variables. Later we can populate our feature vector for prediction using this dict.
'''

with open('cat', 'wb') as fid:
    pickle.dump(index_dict, fid,2)
In [122]:
post_dict = {
'OUTLOOK':'Overcast',
'TEMPERATURE':'Cool',
'HUMIDITY':'Normal',
'WINDY':'FALSE'
}
In [123]:
new_vector = y
new_vector[0] = outlook[post_dict['OUTLOOK']]
new_vector[1] = temperature[post_dict['TEMPERATURE']]
new_vector[2] = humidity[post_dict['HUMIDITY']]
new_vector[3] = windy[post_dict['WINDY']]

new_vector = [new_vector[0:4]]
print(new_vector)

print("Loading model")
pkl_file = open('nb_model.pkl', 'rb')

nb_model = pickle.load(pkl_file)
[array([0, 0, 1, 0])]
Loading model
In [124]:
prediction = nb_model.predict(new_vector)

if prediction == 0:
    response_message  = 'You should not play golf today'
    rm = 'NO'
else:
    response_message = 'You could play golf today'
    rm = 'YES'
print(rm)
YES
In [125]:
prediction = nb_model.predict_proba(new_vector)
print(prediction)
# First index is probability of no, second index is probability of yes
[[0.12912819 0.87087181]]
In [111]:
prediction = nb_model.predict_log_proba(new_vector)
print(prediction)
[[-0.61921679 -0.77298275]]
In [212]:
# Toggle Code
import ipywidgets as widgets
from IPython.display import display, HTML

javascript_functions = {False: "hide()", True: "show()"}
button_descriptions  = {False: "Show code", True: "Hide code"}


def toggle_code(state):

    """
    Toggles the JavaScript show()/hide() function on the div.input element.
    """

    output_string = "<script>$(\"div.input\").{}</script>"
    output_args   = (javascript_functions[state],)
    output        = output_string.format(*output_args)

    display(HTML(output))


def button_action(value):

    """
    Calls the toggle_code function and updates the button description.
    """

    state = value.new

    toggle_code(state)

    value.owner.description = button_descriptions[state]


state = False
toggle_code(state)

button = widgets.ToggleButton(state, description = button_descriptions[state])
button.observe(button_action, "value")

display(button)
In [ ]: